#### COMMUNE-LEVEL ANALYSES #####

library(lfe)
library(rdrobust)
library(dplyr)
library(data.table)

library(stringr)
library(foreign)


library(sf)
library(rgdal)
library(ggplot2)
library(xtable)


rm(list=ls())
gc()
graphics.off()

#Set working directory
setwd("..")

#Load commune-level dataset
load("./data/data_longrun_commune.RData")

##### TABLE A.4 Summary statistics - communes ####

vars <- c("pop1806","pop1881","area","receipts1881","distpref","distspref","distbrig","distroads","distriver","distforest","altmax","rugged","wheat","polsoc")

tab <- data.frame(var=vars, mean=NA, median=NA,min=NA,max=NA,sd=NA)
for(i in 1:length(vars)){
  tab[i,-1] <- sapply(c("mean","median","min","max","sd"), do.call, args = list(dat[!is.na(get(vars[i])),get(vars[i])]))
}

tab <- as.data.table(tab)
tab <- t(tab) %>% as.data.table
names(tab) <- tab[1,] %>% as.character
tab <- tab[-1,]

tab <- tab[,lapply(.SD,as.numeric),.SDcols = pop1806:polsoc]

tab[,`Population 1806`:=round(pop1806,0)]
tab[,`Population 1881`:=round(pop1881,0)]
tab[,`Area`:=round(area,0)]
tab[,`Direct tax receipts 1881`:=round(receipts1881,0)]
tab[,`Dist. dept capital`:=round(distpref,0)]
tab[,`Dist. arrondt capital`:=round(distspref,0)]
tab[,`Dist. brigade`:=round(distbrig,0)]
tab[,`Dist. roads`:=round(distroads,0)]
tab[,`Dist. river`:=round(distriver,0)]
tab[,`Dist. forest`:=round(distforest,0)]
tab[,`Altitude`:=round(altmax,0)]
tab[,`Ruggedness`:=round(rugged,0)]
tab[,`Wheat agricultural suitability`:=round(wheat,0)]
tab[,`Political societies (1789-1794)`:=round(polsoc,2)]

out <- tab[,lapply(.SD, as.character),.SDcols=`Population 1806`:`Political societies (1789-1794)`]
out <- t(out) %>% as.data.frame
names(out) <- c("Mean","Median","Min","Max","Std. Dev.")

N <- lapply(dat[,..vars], FUN=function(x){length(x[!is.na(x)])}) %>% unlist %>% as.vector
length(vars)
out <- cbind(N, out)

print(xtable(out),
      include.rownames = T, 
      include.colnames = T,
      table.placement = 't!',
      type = "latex",
      only.contents = T,
      booktabs = F,
      sanitize.text.function = identity,
      file = "tables/summary_stats_com.tex")


### FIGURE A.20: regression ###

controls <- "log(pop1806)+log(maxpopcant1806)+log(area)+cheflieu+pref+souspref+prefcant+sousprefcant+distpref+distspref+log(distroads+1)+log(1+distforest)+log(1+rugged)+wheat+areacant+x*y+distparis"

plotcoef_longrun_fisc <- function(depvars, dta=dat, main=""){
  coef <- list()
  fes <- c("0","code_dept","arrs")
  ind <- 1
  for(i in 1:length(depvars)){
    for(j in 1:length(fes)){
      if(grepl("area",depvars[i])){
        conts <- gsub("log\\(area\\)\\+","",controls)
      }
      if(depvars[i]!="receiptsarea"){
        conts <- controls
      }
      form <- as.formula(paste(depvars[i]," ~ cad +",conts,"|",fes[j],"|0|code_dept"))
      f <- felm(form, data=dta[get(depvars[i])>0,])
      coef[[ind]] <- summary(f)$coefficients["cad",1:2]
      ind <- ind+1
    }
  }
  
  tab <- do.call(rbind,coef)
  xs <- 1:nrow(tab)
  
  pchs <- c(1:3,1:3,1:3)
  plot(xs,tab[,1], ylim=c(-.05,.15), pch=pchs, xaxt='n', xlab="", ylab="Estimate", main=main)
  abline(h=seq(-.05,.15,.01), col='lightgray')
  points(xs,tab[,1], ylim=c(-.05,.15), pch=pchs)
  segments(xs,tab[,1]+1.96*tab[,2],xs,tab[,1]-1.96*tab[,2])
  abline(h=0, lty=2)
  axis(1, at=c(2,5,8), c("receipts","receipts/area","receipts/pop"))
  abline(v=c(3.5,6.5))
  
  legend('topright', c("No FE","Dept. FE","Arrond. FE"), pch=1:3, bg='white')
  
}

pdf("./figures/coefplot_longrun_fisc_fe.pdf", width=10, height=4)

par(mfrow=c(1,2))

plotcoef_longrun_fisc(c("lreceipts1881","lreceiptsarea1881","lreceiptspop1881"), main="1881")
plotcoef_longrun_fisc(c("lreceipts1911","lreceiptsarea1911","lreceiptspop1911"), main="1911")

dev.off()

### Analysis within treated canton ####

rdplotf <- function(dt=dat, depvar, treatvar="cad", distvar="distborderwd", covs=NULL, fes="pairindex.wd+code_dept", main="", p=1, ylim=c(-.1,.1), bws=2:10, ylab="Estimate", las=0){
  
  depvar <- paste("scale(",depvar,")",sep="")
  
  if(p==1){form <- as.formula(paste(depvar,"~", paste(c(treatvar,covs), collapse="+"), "+ x+y+x*y |", fes, "| 0 | code_dept", sep=""))}
  if(p==2){form <- as.formula(paste(depvar,"~", paste(c(treatvar,covs), collapse="+"), "+ x+y+x*y+x2+y2 |", fes, "| 0 | code_dept", sep=""))}
  res <- list()
  for(b in bws){
    f <- felm(form, data=dt[abs(get(distvar))<b*1000,])
    res[[b]] <- c(f$coefficients[treatvar,1] , f$cse[treatvar])
    }
  res <- do.call(rbind, res) %>% as.data.frame
  names(res) <- c("coef","se")
  plot(bws, res$coef, ylim=ylim, main=main, ylab=ylab, xlab="distance (km)", las=las)
  segments(bws, res$coef+1.96*res$se, bws, res$coef-1.96*res$se)
  abline(h=0, lty=2)
}

### FIGURE 5: Balance centralized v decentralized ##########

balancevars <- c("log(1+wheat)","log(1+rugged)","log(1+distforest)","log(distroads)","log(area)","log(pop1806)","log(1+distpref)","log(1+distspref)","log(areacant)")
labs <- c("wheat","ruggedness","dist. forest","dist. roads","area","pop 1806", "dist. dept capital","dist. arrondt capital","canton area")

pdf("./figures/balance_centrallocal_cad.pdf", height=3)
par(mfrow=c(1,1))
for(i in 1:length(balancevars)){
  if(i<=6){rdplotf(dt=dat,depvar=balancevars[i],  ylim=c(-.1,.1), main=labs[i])}
  if(i>6){rdplotf(dt=dat,depvar=balancevars[i],  ylim=c(-.25,.25), main=labs[i])}
}
dev.off()


### FIGURES 8, A.19, A.20, A.21 #######
controls <- "log(distroads)+log(1+distforest)+log(1+rugged)+log(areacant)+log(1+distpref)+log(1+distspref)"

#Main
lr_res <- function(d, file, p=1, distvar="distborder", fes){
  
  pdf(paste("./figures/",file,".pdf",sep=""), height=3, width=9)
  
  par(mfrow=c(1,2))
  
  depvars <- c("lbasepop","lreceiptspop","llocalreceiptspop","lbasearea","lreceiptsarea","llocalreceiptsarea","lbaseimpotslocauxtot","lreceipts")
  depvars <- c(paste(depvars, 1881, sep=""), paste(depvars, 1911,sep=""))
  labs <- c("tax base pc","receipts pc","local receipts pc","tax base /ha","receipts /ha","local receipts /ha","tax base","receipts")
  labs <- c(paste(labs, 1881), paste(labs, 1911))
  
  for(i in 1:length(depvars)){
    rdplotf(dt=d,depvar=depvars[i], distvar=distvar, treatvar="cad", fes=fes, p=p, main="No controls", bws=2:10, ylim=c(-.15,.15), ylab=labs[i]) 
    rdplotf(dt=d,depvar=depvars[i], distvar=distvar, treatvar="cad", fes=fes, p=p, covs=controls, main="Controls", bws=2:10, ylim=c(-.15,.15), ylab=labs[i]) 
  }
  
  dev.off()
}

lr_res(d=dat, "rd_wd_base_receipts_1881_1911_cad", fes="pairindex.wd+code_dept",distvar="distborderwd")

### FIGURE A.17: Spatial RD map #####

load("./data/maps_cadaster.RData")

ggplot()+ geom_sf(data=mapcad, mapping=aes(colour=as.factor(cad)), size=.5)+
  labs(colour="Treatment")+
  geom_sf(data=deptf[!(deptf$code_dept %in% c(6,27,73,74,67,68,57,9,12)),], fill='transparent')+
  theme_void()
ggsave("./figures/map_rd_wd_sample_10km.jpg", width=7, height=7)

ggplot()+ geom_sf(data=mapcad[mapcad$code_deptnew==24,], mapping=aes(colour=as.factor(cad)))+
  labs(colour="Treatment")+
  geom_sf(data=deptf[deptf$code_dept==24,], fill='transparent')+
  geom_sf(data=bord[grep("^24",bord$communeide),], mapping=aes())+
  theme_void()
ggsave("./figures/map_rd_sample_10km_24.jpg", width=5, height=5)

#### TABLE A.6 Non parametric RD #####

dt <- dat[abs(distborderwd)<10000,]

dt[,ldistroads:=log(distroads)]
dt[,lareacant:=log(areacant)]
dt[,ldistpref:=log(1+distpref)]
dt[,ldistspref:=log(1+distspref)]
dt[,lrugged:=log(1+rugged)]
dt[,ldistforest:=log(1+distforest)]
dt[,ldistparis:=log(1+distparis)]
dt[,lpop1806:=log(pop1806)]
dt[,larea:=log(area)]

depsfe <- model.matrix(~ 1+factor(dt$code_dept))
covs <- c("lpop1806","lrugged","larea","ldistparis","ldistpref","ldistspref","lareacant","ldistroads","ldistforest","x","y","xy")

depvars <- c("lreceiptspop1881","lreceiptspop1911","lreceiptsarea1881","lreceiptsarea1911")
labs <- c("Receipts pc 1881", "Receipts pc 1911","Receipts /ha 1881", "Receipts /ha 1911")

rd <- function(outcome,  dt, w = 3, c = 0, p = 1, fe=T){ # 
  y <- dt[, scale(get(outcome))]
  x <- dt$distborderwd
  if(fe==T){
    out <- rdrobust(y, x=x, c = c, p = p, covs=cbind(dt[,..covs], depsfe), cluster = dt$code_dept)
  }
 if(fe==F){
   out <- rdrobust(y, x=x, c = c, p = p, covs=dt[,..covs], cluster = dt$code_dept)
 }
  h <- out$bws[1, 1]
  data.table(outcome = outcome, 
             est = out$coef[w], 
             se = out$se[w], 
             p =  out$pv[w],
             cis_l = out$ci[w,1], 
             cis_u = out$ci[w,2],
             BW = format(round(h, 0), big.mark = ","),
             eff_N = format(round(sum(out$N_h), 0), big.mark = ","),
             N = format(round(sum(out$N), 0), big.mark = ","),
             mean_dv = mean(y[abs(x) <= h], na.rm=T),
             sd_dv = sd(y[abs(x) <= h], na.rm=T))
}
p_value <- function(p) cut(p, breaks = c(-1, .01, .05, 1), 
                           labels = c("$^{\\!*\\!*}$", "$^{\\!*}$", ""))
tab_rd <- function(r, file = ""){
  r[, p_star := p_value(p)]
  r <- r[, .(outcome, 
             `Coef. ($\\hat{\\tau}$)` = paste0(sprintf("%.2f", est), p_star), 
             `S.E.` = paste0("(", sprintf("%.2f", se), ")"), 
             Bandwidth = paste0("{$", BW, "$}"), 
             `Effective N` = paste0("{$", eff_N, "$}"), 
             `Outcome mean` = sprintf("%.2f", mean_dv), 
             `Outcome SD` = sprintf("%.2f", sd_dv))]
  r <- dcast(melt(r, id.vars = "outcome"), 
             variable ~ factor(outcome, levels = r$outcome))
  print(xtable(r),
        include.rownames = FALSE, 
        include.colnames = FALSE,
        table.placement = 't!',
        type = "latex",
        only.contents = TRUE,
        sanitize.text.function = identity,
        file = file)
}

tab1 <- lapply(depvars, FUN=rd, dt=dt)
r1 <- rbindlist(tab1)
tab_rd(r1, file="./tables/rd_receipts_wd.tex")

### FIGURE A.22 Non parametric RD ########

pdf("./figures/rdplots_receipts_wd.pdf")
for(i in 1:2){
  rdplot(y=dt[,scale(get(depvars[i]))], x=dt$distborderwd, c=0, covs=cbind(dt[,..covs], depsfe), h=as.numeric(gsub(",","",r1$BW[i])), p=1, x.lim=c(-3000,3000), y.lim=c(-1,1), kernel='tri', title=labs[i])
}
dev.off()


#### FIGURE A.9 Nord case study #######

load("./data/data_nordcasestudy.RData")

d <- dat_nord

#1804 annuaire: contri fonciere until 1814
pdf("./figures/nord_plotyear.pdf")

d[mindateref<=1814 & popint<2000,mean(cont/popint,na.rm=T),keyby=year] %>% plot(ylim=c(7,9), type='o', lty=2, ylab="Direct tax/hab.")
d[mindateref>1815 & popint<2000,mean(cont/popint,na.rm=T),keyby=year] %>% lines(type='o')
legend("topleft", c("Cadastered in 1815","Not cadastered in 1815"), lty=2:1)

d[mindateref<=1814 & popint<2000,mean(cont/area,na.rm=T),keyby=year] %>% plot(ylim=c(7,9), type='o', lty=2, ylab="Direct tax/area")
d[mindateref>1814 & popint<2000,mean(cont/area,na.rm=T),keyby=year] %>% lines(type='o')
legend("topleft", c("Cadaster in 1815","No cadaster in 1815"), lty=2:1)

dev.off()




